home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
ast_comp
/
cpp-kit.lha
/
c++kit
/
Scan.H
< prev
next >
Wrap
C/C++ Source or Header
|
1993-04-11
|
3KB
|
129 lines
/* 9th April, 1993 Mayan Moudgill
* Basically is given a file. If asked it will try and scan one of the
* following tokens from the file.
* strings: "([^"\\\n]|(\.))*"
* identifier: [A-Za-z_0-9]+
* integer: [-+]?[0-9]+
* character: .
* These returns a Token.
* It can also accept a mark, which returns a Mark, and a reject which
* when given a Mark, rolls back the state to the Mark, and starts matching
* from that point onwards. Also, for convenience it can rollback the
* last token. Also, it can match a character and a character-string.
* (these are equivalent to trying to scanning a character/string,
* and rolling back if the character/string does not exactly match the
* argument).
*
* The other facility that the function provides is that it keeps track
* of the number of lines read in.
*
* I can think of at least of two ways of implementing the tokenizing
* and rollback mechanisms: reading files, and mmap'ing them. This
* implementation uses mmap.
*/
#include <osfcn.h>
#include <stdlib.h>
#include <iostream.h>
#include "Token.H"
class Scan {
public:
class Mark {
friend class Scan;
private:
int _nl;
char * _at;
};
private:
char _name[256]; // file name
int _fd; // file descriptor
char * _file; // the pointer to the beginning of the mmap'd region
long _size; // the file size
char * _end; // end of file
int _close; // has it not been opened?
int _eof; // seen eof?
char * _at; // file pointer
int _nl; // new lines read in
char * _oat; // previous file pointer (for rollback)
int _onl; // previous new lines
char * _wat;
char * _tat;
int _wnl;
int _tnl;
private:
int _start()
{
_wat = _tat = _at;
_wnl = _tnl = _nl;
return _at == _end || _close;
}
int _get()
{
int c;
if( _tat == _end ) {
c = EOF;
}
else {
c = *_tat++;
if( c == '\n' ) {
_tnl++;
}
}
return c;
}
void _backup()
{
_tat--;
if( *_tat == '\n') {
_tnl--;
}
}
void _commit()
{
_oat = _at;
_onl = _nl;
_at = _tat;
_nl = _tnl;
}
int _space();
public:
Scan();
Scan(char * name);
~Scan();
int line()
{ return _nl; }
Mark mark()
{
Mark mark;
mark._nl = _nl;
mark._at = _at;
return mark;
}
void back( Mark& mark)
{
_onl = _nl = mark._nl;
_oat = _at = mark._at;
}
void back()
{
_nl = _onl;
_at = _oat;
}
/* now for the actual scan routines */
/* scan_char & scan_string are special cases */
int match(char c, Token& result);
int match(char *, Token&);
int number(Token&);
int string(Token&);
int identifier(Token&);
int token(Token&);
int character(Token& result);
int eof();
};